Monitoring Assessment ILK Literature Corpus

IPBES Transformative Change Assessment

Authors

Rainer M. Krug

yyy xxx

Doi

DOI License: CC BY 4.0

Part of the Data Management Report DOI

Disclaimer

This is a technical background document for the IPBES MONITORING ASSESSMENT. It provides technical details and implementation settings for the data management report of the MTA ILK Corpus and its usage. The sole purpose of this document is to document the workflows used to produce statistics, figures and maps, to document the source of the data and to make the process transparent and reproducible.

Contributors

Assessment Experts

TO BE ADDED

Data and Knowledge tsu

  • Niamir, Aidin ORCID
  • Gudde, Renske ORCID

Metadata

Short Title / Tag

MTA_ILK / [MTA_ILK]

Code repo

Github repository

Build No: 76

Introduction

Schematic Overview

TODO: Will be added

Show the code
basename <- file.path("figures", "mta_ilk_overview")

nf <- list.files(
  path = dirname(basename),
  pattern = basename(basename)
) |>
  length()

if (nf < 3) {
  puml <- readLines(file.path("input", "mta_ilk.plantuml")) |>
    paste(collapse = "\n") |>
    plantuml::plantuml()

  puml |>
    plantuml::get_graph(
      file = paste0(basename, ".pdf")
    )

  puml |>
    plantuml::get_graph(
      file = paste0(basename, ".svg")
    )

  puml |>
    plantuml::get_graph(
      file = paste0(basename, ".png")
    )
}

Schematic overview of the MTA ILK Corpus workflow

Search Terms

Here are the search terms used in this document. They were provided by the authors, and some adaptations were done by the tsu for data and knowledge management to adapt them to be suitable for a search in OpenAlex.

ILK

Show the code
cat(tar_read(search_terms)$ilk)
"Indigenous People" OR
"Indigenous Peoples" OR
"Indigenous community" OR
"local community" OR
IPLC OR
Aboriginal OR
Aborigine OR
"First nation" OR
Amerindian OR
Autochthonous OR
peasant OR
peasants OR
tribe OR
tribes OR
tribal OR
ethnic OR
smallholder OR
smallholders OR
"small-holder" OR
"small-holders" OR
subsistence OR
artisan OR
artisanal OR
fisher OR
fishers OR
hunter OR
hunters OR
hunting OR
gatherer OR
gatherers OR
gathering OR
pastoralist OR
pastoralists OR
pastoralism OR
herder OR
herders OR
herding OR
transhumance OR
agroforestry OR
swidden OR
aquaculture OR
"Non timber forest product" OR
"Non-timber forest product" OR
"nontimber forest product" OR
customary OR
"Indigenous knowledge" OR
"Indigenous ecological knowledge" OR
"Indigenous and local knowledge" OR
ILK OR
"local knowledge" OR
"traditional ecological knowledge" OR
TEK OR
"traditional knowledge" OR
TK OR
bioculture OR
biocultural OR
"bio-culture" OR
"bio-cultural" OR
ethnobotany OR
ethnoecology OR
ethnopharmacology OR
cosmovision OR
participation OR
participatory OR
"community-based" OR
"community based" OR
"co-design" OR
codesign OR
"co-produce" OR
"co-production" OR
coproduce OR
coproduction OR
FPIC OR
"free prior and informed consent" OR
"free prior and informed consent" OR
"data sovereignty" OR
"access and benefit sharing"

Monitoring

Show the code
#|

cat(tar_read(search_terms)$monitoring)
monitor OR
monitoring OR
indicator OR
indicators OR
observation OR
observing 

Methods

Get and calculate Data from OpenAlex

These data is gathered from OpenAlex directly via API call. Consequelntly, each time the data is gathered, there will be a likely increasing number of hirs. The timestamp of the search is saved in the attribute timestamp of the saved object. This data is used to assess the quality of the TCA Corpus.

get_search_term_hits <- function(search_terms){
  st <- names(search_terms)
  search_term_hits <- lapply(
    st,
    function(stn) {
      message("getting '", stn, "' ...")
      search <- search_terms[[stn]] |>
        compact_st()
      openalexR::oa_fetch(title_and_abstract.search = search, count_only = TRUE, verbose = TRUE) |>
        unlist()
    }
  ) |>
    do.call(what = rbind) |>
    as.data.frame() |>
    dplyr::mutate(page = NULL, per_page = NULL) |>
    dplyr::mutate(count = formatC(count, format = "f", big.mark = ",", digits = 0))

  rownames(search_term_hits) <- st |>
    gsub(pattern = "st_", replacement = "") |>
    gsub(pattern = "f_", replacement = "")

  attributes(search_term_hits)$timestamp <- Sys.time()

  return(search_term_hits)
}

Assess individual search terms

ILK

get_search_assessment_ilk <- function(st_ilk_fn, search_terms){
  result <- list(
    excl = assess_search_term(
      st = readLines(st_ilk_fn),
      AND_term = search_terms$monitoring,
      remove = " OR$",
      excl_others = TRUE, 
      verbose = TRUE
    ),
    incl = assess_search_term(
      st = readLines(st_ilk_fn),
      AND_term = search_terms$monitoring,
      remove = " OR$",
      excl_others = FALSE, 
      verbose = TRUE
    )
  ) |>
    do.call(what = cbind) |>
    dplyr::rename(
      term = excl.term
    ) |>
    dplyr::mutate(
      incl.term = NULL
    )
  
  attributes(result)$timestamp <- Sys.time()
  attributes(result)$AND_term <- search_terms$monitoring

  return(result)
}

Monitoring

get_search_assessment_monitoring <- function(st_monitoring_fn, search_terms){
  result <- list(
    excl = assess_search_term(
      st = readLines(st_monitoring_fn),
      AND_term = search_terms$ilk,
      remove = " OR$",
      excl_others = TRUE, 
      verbose = TRUE
    ),
    incl = assess_search_term(
      st = readLines(st_monitoring_fn),
      AND_term = search_terms$ilkg,
      remove = " OR$",
      excl_others = FALSE, 
      verbose = TRUE
    )
  ) |>
    do.call(what = cbind) |>
    dplyr::rename(
      term = excl.term
    ) |>
    dplyr::mutate(
      incl.term = NULL
    )

  attributes(result)$timestamp <- Sys.time()
  attributes(result)$AND_term <- search_terms$ilk

  return(result)
}

Get Key Works

get_key_works <- function(key_paper){
  dois <- key_paper$doi |>
    IPBES.R::doi_clean()
  dois <- dois[!is.na(dois)]

  kw <- openalexR::oa_fetch(
    entity = "works",
    doi = dois
  ) |>
    abbreviate_authorships()

  attributes(kw)$timestamp <- Sys.time()

  return(kw)
}

Check Key paper in Eearch

get_key_paper_in_search <- function(key_paper, key_works, search_terms) {

  dois <- key_paper$doi |>
    IPBES.R::doi_clean()
  dois <- dois[!is.na(dois)]

  kp_in_search <- openalexR::oa_fetch(
    entity = "works",
    title_and_abstract.search = search_terms$full,
    doi = dois,
    options = list(
      select = c(
        "doi"
      )
    )
  ) |>
    unlist() |>
    unname()


  result <- key_works |>
    dplyr::mutate(
      in_search = doi %in% kp_in_search
    ) |>
    dplyr::select(
      id,
      in_search,
      doi,
      citation,
      title,
      abstract
    )
  
    attributes(result)$timestamp <- Sys.time()
  
  return(result)
}

Results

Search for Search Terms

The Search was done at the 2025-03-11 13:42:35.920093.

Show the code
tar_read(search_term_hits) |>
  dplyr::mutate(
    "Search Term" = c(
      paste0("[Monitoring](", file.path("input", "search terms", "monitoring.txt"), ")"), 
      paste0("[ILK](", file.path("input", "search terms", "ilk.txt"), ")"), 
      "Combined (monitoring AND ILK)"
    ),
    db_response_time_ms = NULL
    ) |>
  dplyr::select(
    "Search Term",
    everything()
  ) |>
  dplyr::rename(
    "Number of hits" = count
    ) |>
  knitr::kable(
    caption = "Search term hits",
    row.names = FALSE
  )
Search term hits
Search Term Number of hits
Monitoring 7,686,122
ILK 4,276,391
Combined (monitoring AND ILK) 383,347

Assessment of Search Terms

To do the assessment, the search term was split into indivitual lines (as in the original file ilk and monitoring and two searches were done for each of the terms:

  1. A search term AND monitoring and the count is shown in the column Count (inclusive)
  2. A search (term AND NOT OTHER TERMS) OR monitoring and the count is shown in the column Count (exclusive)

The column Count (exclusive) is the contribution of this search term. In other words, if the value is zero, the term does not contribute anything to the results corpus. If the number is large, it might be worth taking a closer look into this term as it might have more the initially thought meaning.

The column Count (inclusive) is the number of times the actual term is found in the corpus.

ILK

The searches for the ILK assessment was done at the 2025-03-11 14:00:49.12223.

Show the code
tar_read(search_assessment_ilk) |>
  dplyr::arrange(
    desc(excl.count)
  ) |>
  dplyr::mutate(
    excl.count = formatC(excl.count, format = "f", big.mark = ",", digits = 0),
    incl.count = formatC(incl.count, format = "f", big.mark = ",", digits = 0)    
  ) |>
  dplyr::rename(
    Term = term,
    "Count (exclusive)" = excl.count,
    "Count (inclusive)" = incl.count
    ) |>
  knitr::kable(
    caption = "Assessment ILK Search Term",
    row.names = FALSE
  )
Assessment ILK Search Term
Term Count (exclusive) Count (inclusive)
participation 90,396 107,164
ethnic 34,074 41,162
gathering 20,371 25,731
participatory 15,578 24,124
aquaculture 10,533 11,646
“local community” 8,829 14,110
ethnobotany 6,655 8,810
TK 5,049 5,471
agroforestry 4,948 7,497
hunting 4,366 6,733
customary 3,290 4,611
TEK 2,662 3,089
biocultural 2,197 3,736
pastoralism 1,987 2,890
subsistence 1,938 3,306
tribal 1,788 3,882
Aboriginal 1,703 3,344
Autochthonous 1,462 1,632
“co-design” 1,438 2,068
“local knowledge” 982 2,065
“Indigenous community” 618 1,849
“co-production” 588 994
“Indigenous knowledge” 511 1,414
“traditional knowledge” 436 1,202
“co-produce” 405 661
ILK 341 419
“First nation” 338 804
codesign 152 223
coproduction 143 274
Amerindian 122 228
Aborigine 112 263
coproduce 97 169
“traditional ecological knowledge” 50 307
swidden 49 117
transhumance 47 126
ethnopharmacology 40 74
“data sovereignty” 39 74
cosmovision 30 56
“access and benefit sharing” 30 54
IPLC 22 40
“nontimber forest product” 18 39
“bio-cultural” 15 47
ethnoecology 15 46
bioculture 8 9
“Indigenous ecological knowledge” 6 36
FPIC 6 22
“free prior and informed consent” 5 42
“free prior and informed consent” 5 42
“bio-culture” 4 7
“Indigenous People” 0 4,208
“Indigenous Peoples” 0 4,208
peasant 0 2,356
peasants 0 2,356
tribe 0 14,820
tribes 0 14,820
smallholder 0 3,043
smallholders 0 3,043
“small-holder” 0 343
“small-holders” 0 343
artisan 0 2,724
artisanal 0 2,724
fisher 0 14,511
fishers 0 14,511
hunter 0 5,365
hunters 0 5,365
gatherer 0 64,418
gatherers 0 64,418
pastoralist 0 738
pastoralists 0 738
herder 0 11,197
herders 0 11,197
herding 0 11,197
“Non timber forest product” 0 2,427
“Non-timber forest product” 0 2,427
“Indigenous and local knowledge” 0 47
“community-based” 0 20,837
“community based” 0 20,837

Monitoring

The searches for the Monitoring assessment was done at the 2025-03-11 14:00:57.52663.

Show the code
tar_read(search_assessment_monitoring) |>
  dplyr::arrange(
    desc(excl.count)
  ) |>
  dplyr::mutate(
    excl.count = formatC(excl.count, format = "f", big.mark = ",", digits = 0),
    incl.count = formatC(incl.count, format = "f", big.mark = ",", digits = 0)    
  ) |>
  dplyr::rename(
    Term = term,
    "Count (exclusive)" = excl.count,
    "Count (inclusive)" = incl.count
    ) |>
  knitr::kable(
    caption = "Assessment Monitoring Search Term",
    row.names = FALSE
  )
Assessment Monitoring Search Term
Term Count (exclusive) Count (inclusive)
observation 2,424,399 2,743,837
observing 221,548 275,290
monitor 0 3,596,285
monitoring 0 3,596,283
indicator 0 1,583,962
indicators 0 1,583,962

Keypaper in ILK-MAT Corpus

Show the code
tar_read(key_paper_in_search) |>
  dplyr::arrange (
    desc(in_search)
  ) |>
  dplyr::mutate(
    id = paste0("<a href='", id, "' target='_blank'>", id, "</a>"),
    doi = paste0("<a href='", doi, "' target='_blank'>", doi, "</a>"),
  ) |>
  IPBES.R::table_dt(escape = FALSE)

Reuse

Citation

BibTeX citation:
@report{krug,
  author = {Krug, Rainer M. and xxx, yyy},
  title = {Monitoring {Assessment} {ILK} {Literature} {Corpus}},
  doi = {10.5281/zenodo.xxxxxxxx},
  langid = {en}
}
For attribution, please cite this work as:
Krug, Rainer M., and yyy xxx. n.d. “Monitoring Assessment ILK Literature Corpus.” IPBES Monitoring Assessment. https://doi.org/10.5281/zenodo.xxxxxxxx.